Questo documento va caricato in Moodle come cognome-nome-esX.Rmd dove X è il numero dell’esercizio (1, 2, 3 o 4).
scrivere qui la soluzione del primo quesito
scrivere qui la soluzione del secondo quesito
scrivere qui la soluzione del terzo quesito (sempre che ve ne siano tre, altrimenti cancellare questa sezione)
dataset <- read.csv('efficienza.csv')
# x_name <- processi
# y_name <- dimensione
plot(processi ~ dimensione, data = dataset)
summary(dataset)
## processi dimensione
## Min. : 1.00 Min. : 6.0
## 1st Qu.: 12.00 1st Qu.: 8.0
## Median : 26.00 Median :14.0
## Mean : 35.52 Mean :13.1
## 3rd Qu.: 48.00 3rd Qu.:17.0
## Max. :100.00 Max. :20.0
outliers_range <- 80
outliers <- which(dataset$processi > outliers_range) # processi è la x_name
summary(outliers)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.00 8.75 13.50 16.00 20.75 29.00
mod <- lm(processi ~ dimensione, data = dataset)
plot(processi ~ dimensione , data=dataset)
curve(coef(mod)[1] + coef(mod)[2] * x + coef(mod)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
abline(mod, col = 'red')
summary(mod)
##
## Call:
## lm(formula = processi ~ dimensione, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.392 -19.051 -11.370 4.014 69.289
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.261 16.551 3.460 0.00181 **
## dimensione -1.659 1.199 -1.384 0.17783
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.94 on 27 degrees of freedom
## Multiple R-squared: 0.0662, Adjusted R-squared: 0.03162
## F-statistic: 1.914 on 1 and 27 DF, p-value: 0.1778
par(mfrow = c(1,2))
plot(residuals(mod) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod))
## [1] 8 9
par(mfrow = c(1,1))
mod2 <- lm(processi ~ dimensione , data = dataset, subset=-outliers)
plot(processi ~ dimensione , data=dataset, subset= -outliers)
summary(mod2)
##
## Call:
## lm(formula = processi ~ dimensione, data = dataset, subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9634 -4.6211 0.5299 4.0233 13.1877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.6613 4.2659 17.50 8.67e-15 ***
## dimensione -3.8356 0.3217 -11.92 2.51e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.034 on 23 degrees of freedom
## Multiple R-squared: 0.8608, Adjusted R-squared: 0.8547
## F-statistic: 142.2 on 1 and 23 DF, p-value: 2.508e-11
plot(processi ~ dimensione, data = dataset)
curve(coef(mod)[1] + coef(mod)[2] * x + coef(mod)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod2)[1] + coef(mod2)[2] * x + coef(mod2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
abline(mod, col = 'red')
par(mfrow = c(1,2))
plot(residuals(mod2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod2))
## [1] 3 1
par(mfrow = c(1,1))
mod_quad <- lm(processi ~ dimensione + I(dimensione ^ 2), data = dataset)
plot(processi ~ dimensione + I(dimensione ^ 2), data=dataset)
curve(coef(mod_quad)[1] + coef(mod_quad)[2] * x + coef(mod_quad)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
summary(mod_quad)
##
## Call:
## lm(formula = processi ~ dimensione + I(dimensione^2), data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.930 -19.101 -9.357 3.668 70.423
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.5531 54.4679 1.442 0.161
## dimensione -5.4942 9.4102 -0.584 0.564
## I(dimensione^2) 0.1521 0.3700 0.411 0.684
##
## Residual standard error: 28.38 on 26 degrees of freedom
## Multiple R-squared: 0.07223, Adjusted R-squared: 0.0008619
## F-statistic: 1.012 on 2 and 26 DF, p-value: 0.3773
par(mfrow = c(1,2))
plot(residuals(mod_quad) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_quad))
## [1] 8 9
par(mfrow = c(1,1))
mod_quad_2 <- lm(processi ~ dimensione + I(dimensione ^ 2), data = dataset, subset=-outliers)
plot(processi ~ dimensione + I(dimensione ^ 2), data=dataset, subset= -outliers)
summary(mod_quad_2)
##
## Call:
## lm(formula = processi ~ dimensione + I(dimensione^2), data = dataset,
## subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8440 -3.4158 0.5015 4.0487 10.5842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104.33360 12.36306 8.439 2.4e-08 ***
## dimensione -9.18934 2.13998 -4.294 0.000294 ***
## I(dimensione^2) 0.21240 0.08412 2.525 0.019283 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.333 on 22 degrees of freedom
## Multiple R-squared: 0.892, Adjusted R-squared: 0.8822
## F-statistic: 90.9 on 2 and 22 DF, p-value: 2.321e-11
plot(processi ~ dimensione, data = dataset)
curve(coef(mod_quad)[1] + coef(mod_quad)[2] * x + coef(mod_quad)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod_quad_2)[1] + coef(mod_quad_2)[2] * x + coef(mod_quad_2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
par(mfrow = c(1,2))
plot(residuals(mod_quad_2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_quad_2))
## 23 3
## 20 3
par(mfrow = c(1,1))
mod_cubica <- lm(processi ~ dimensione + I(dimensione ^ 3), data = dataset)
plot(processi ~ dimensione + I(dimensione ^ 2), data=dataset)
curve(coef(mod_cubica)[1] + coef(mod_cubica)[2] * x + coef(mod_cubica)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
summary(mod_cubica)
##
## Call:
## lm(formula = processi ~ dimensione + I(dimensione^3), data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.450 -18.973 -9.846 3.653 70.223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.385126 38.510468 1.750 0.092 .
## dimensione -3.056178 4.931776 -0.620 0.541
## I(dimensione^3) 0.002757 0.009430 0.292 0.772
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.43 on 26 degrees of freedom
## Multiple R-squared: 0.06926, Adjusted R-squared: -0.002335
## F-statistic: 0.9674 on 2 and 26 DF, p-value: 0.3933
par(mfrow = c(1,2))
plot(residuals(mod_cubica) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_cubica))
## [1] 8 9
par(mfrow = c(1,1))
mod_cubica_2 <- lm(processi ~ dimensione + I(dimensione ^ 3), data = dataset, subset=-outliers)
plot(processi ~ dimensione + I(dimensione ^ 3), data=dataset, subset= -outliers)
summary(mod_cubica_2)
##
## Call:
## lm(formula = processi ~ dimensione + I(dimensione^3), data = dataset,
## subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3676 -3.3959 0.7948 3.9177 10.6041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.263293 8.670800 10.987 2.12e-10 ***
## dimensione -6.684719 1.115278 -5.994 4.94e-06 ***
## I(dimensione^3) 0.005614 0.002124 2.643 0.0148 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.266 on 22 degrees of freedom
## Multiple R-squared: 0.8943, Adjusted R-squared: 0.8847
## F-statistic: 93.09 on 2 and 22 DF, p-value: 1.835e-11
plot(processi ~ dimensione, data = dataset)
curve(coef(mod_cubica)[1] + coef(mod_cubica)[2] * x + coef(mod_cubica)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod_cubica_2)[1] + coef(mod_cubica_2)[2] * x + coef(mod_cubica_2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
par(mfrow = c(1,2))
plot(residuals(mod_cubica_2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_cubica_2))
## 23 3
## 20 3
par(mfrow = c(1,1))
mod_logaritmica_x <- lm(log(processi) ~ dimensione, data = dataset)
plot(log(processi) ~ dimensione, data=dataset)
abline(mod_logaritmica_x, col = "red")
summary(mod_logaritmica_x)
##
## Call:
## lm(formula = log(processi) ~ dimensione, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.57997 -0.39783 -0.05473 0.10326 1.91984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.74800 0.54082 8.779 2.15e-09 ***
## dimensione -0.12045 0.03919 -3.073 0.0048 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.913 on 27 degrees of freedom
## Multiple R-squared: 0.2591, Adjusted R-squared: 0.2317
## F-statistic: 9.445 on 1 and 27 DF, p-value: 0.004798
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica_x) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica_x))
## [1] 6 9
mod_logaritmica_x2 <- lm(log(processi) ~ dimensione , data = dataset, subset=-outliers)
plot(log(processi) ~ dimensione , data=dataset, subset= -outliers)
summary(mod_logaritmica_x2)
##
## Call:
## lm(formula = log(processi) ~ dimensione, data = dataset, subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.94733 -0.14401 0.08426 0.17129 0.64130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.25862 0.30605 17.182 1.29e-14 ***
## dimensione -0.18396 0.02308 -7.972 4.56e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5047 on 23 degrees of freedom
## Multiple R-squared: 0.7342, Adjusted R-squared: 0.7227
## F-statistic: 63.54 on 1 and 23 DF, p-value: 4.56e-08
plot(log(processi) ~ dimensione, data = dataset)
curve(coef(mod_logaritmica_x)[1] + coef(mod_logaritmica_x)[2] * x + coef(mod_logaritmica_x)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod_logaritmica_x2)[1] + coef(mod_logaritmica_x2)[2] * x + coef(mod_logaritmica_x2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
abline(mod_logaritmica_x, col = "red")
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica_x2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica_x2))
## 6 16
## 6 14
par(mfrow = c(1,1))
mod_logaritmica_y <- lm(processi ~ log(dimensione), data = dataset)
plot(processi ~ log(dimensione), data=dataset)
abline(mod_logaritmica_y, col = "red")
summary(mod_logaritmica_y)
##
## Call:
## lm(formula = processi ~ log(dimensione), data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.790 -18.949 -10.488 3.764 69.821
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.41 35.38 2.442 0.0214 *
## log(dimensione) -20.28 13.95 -1.454 0.1575
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.84 on 27 degrees of freedom
## Multiple R-squared: 0.07261, Adjusted R-squared: 0.03826
## F-statistic: 2.114 on 1 and 27 DF, p-value: 0.1575
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica_y) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica_y))
## [1] 8 9
mod_logaritmica_y2 <- lm(processi ~ log(dimensione) , data = dataset, subset=-outliers)
plot(processi ~ log(dimensione), data=dataset, subset= -outliers)
summary(mod_logaritmica_y2)
##
## Call:
## lm(formula = processi ~ log(dimensione), data = dataset, subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8395 -3.4371 0.2184 4.5486 11.1257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.800 8.527 16.16 4.76e-14 ***
## log(dimensione) -45.185 3.426 -13.19 3.29e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.443 on 23 degrees of freedom
## Multiple R-squared: 0.8832, Adjusted R-squared: 0.8781
## F-statistic: 173.9 on 1 and 23 DF, p-value: 3.289e-12
plot(processi ~ log(dimensione), data = dataset)
curve(coef(mod_logaritmica_y)[1] + coef(mod_logaritmica_y)[2] * x + coef(mod_logaritmica_y)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod_logaritmica_y2)[1] + coef(mod_logaritmica_y2)[2] * x + coef(mod_logaritmica_y2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
abline(mod_logaritmica_y, col = "red")
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica_y2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica_y2))
## 23 3
## 20 3
par(mfrow = c(1,1))
mod_logaritmica <- lm(log(processi) ~ log(dimensione), data = dataset)
plot(log(processi) ~ log(dimensione), data=dataset)
abline(mod_logaritmica, col = "red")
summary(mod_logaritmica)
##
## Call:
## lm(formula = log(processi) ~ log(dimensione), data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6441 -0.4204 -0.0970 0.1637 1.8557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.6319 1.1675 5.680 4.93e-06 ***
## log(dimensione) -1.3797 0.4603 -2.998 0.00578 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9188 on 27 degrees of freedom
## Multiple R-squared: 0.2497, Adjusted R-squared: 0.2219
## F-statistic: 8.986 on 1 and 27 DF, p-value: 0.00578
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica) ~ dimensione, data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica))
## [1] 6 9
mod_logaritmica2 <- lm(log(processi) ~ log(dimensione) , data = dataset, subset=-outliers)
plot(log(processi) ~ log(dimensione), data=dataset, subset= -outliers)
summary(mod_logaritmica2)
##
## Call:
## lm(formula = log(processi) ~ log(dimensione), data = dataset,
## subset = -outliers)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.05396 -0.14892 0.07807 0.25270 0.60856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.1099 0.7047 11.508 5.07e-11 ***
## log(dimensione) -2.0952 0.2832 -7.399 1.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5324 on 23 degrees of freedom
## Multiple R-squared: 0.7042, Adjusted R-squared: 0.6913
## F-statistic: 54.75 on 1 and 23 DF, p-value: 1.593e-07
plot(log(processi) ~ log(dimensione), data = dataset)
curve(coef(mod_logaritmica)[1] + coef(mod_logaritmica)[2] * x + coef(mod_logaritmica)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "blue")
curve(coef(mod_logaritmica2)[1] + coef(mod_logaritmica2)[2] * x + coef(mod_logaritmica2)[3] * x ^ 2, from = min(dataset$processi), to = max(dataset$processi), add = TRUE, col = "green")
abline(mod_logaritmica, col = "red")
par(mfrow = c(1,2))
plot(residuals(mod_logaritmica2) ~ dimensione[-outliers], data=dataset)
abline(h=0, col = "red")
qqPlot(residuals(mod_logaritmica2))
## 6 23
## 6 20
par(mfrow = c(1,1))
par(mfrow = c(2,2))
mod_logaritmica <- lm(log(processi) ~ log(dimensione), data = dataset)
plot(processi ~ dimensione, data=dataset)
plot(processi ~ dimensione + I(dimensione ^ 2), data=dataset)
plot(processi ~ dimensione + I(dimensione ^ 3), data=dataset)
plot(processi ~ log(dimensione), data=dataset)
plot(log(processi) ~ dimensione, data=dataset)
plot(log(processi) ~ log(dimensione), data=dataset)
par(mfrow = c(1,1))
summary(mod_logaritmica)
##
## Call:
## lm(formula = log(processi) ~ log(dimensione), data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6441 -0.4204 -0.0970 0.1637 1.8557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.6319 1.1675 5.680 4.93e-06 ***
## log(dimensione) -1.3797 0.4603 -2.998 0.00578 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9188 on 27 degrees of freedom
## Multiple R-squared: 0.2497, Adjusted R-squared: 0.2219
## F-statistic: 8.986 on 1 and 27 DF, p-value: 0.00578
predict(mod2, newdata = data.frame(dimensione = 10), interval = "prediction") # dove 10 è la dimenisone che vogliamo controllare
## fit lwr upr
## 1 36.30562 21.37121 51.24004
CANCELLARE LE SEGUENTI RIGHE!
Qualche comando LaTeX utile per scrivere le formule
Formule nel testo: \(\sin(\pi x)\)
Formule su righe separate: \[ \sin(\pi x) \]
Momenti: \[ \mu_k, E(X^k), M_k, m_k, \bar{X}, \bar{x}, \sigma^2, Var(X), S^2 \] Stimatori e stime: \[ \hat \theta, \hat \mu, \hat \sigma^2, \hat \lambda \] \[ \tilde \theta, \tilde \mu, \tilde \sigma^2, \tilde \lambda \] Verosimiglianza: \[ L(\theta), \ell(\theta), \ell'(\theta), \ell''(\theta) \] Frazioni: \[ \frac{1}{\sqrt{2}} X_i, \frac{X_i}{Y_i} \]
Sommatorie: \[ \sum_{i=1}^n X_i, \sum_{i=1}^n \log X_i, \sum_{i=1}^n \sqrt{X_i}, \sum_{i=1}^n \frac{X_i}{Y_i} \] Integrali: \[ \int_{0}^1 f(x) dx, \int_{a}^{\infty} f(x) dx, \int_{-\infty}^{\infty} f(x) dx \]
Come scrivere i comandi R
2 * 3 / log(1.2)
## [1] 32.90889
sin(2 * pi / 20)
## [1] 0.309017
a <- 2 * c(1, 2)
a
## [1] 2 4
z <- qnorm(0.95)
z
## [1] 1.644854
t <- qt(0.95, df = 12)
t
## [1] 1.782288